# This script reproduces Figure 6 #
rm(list=ls())
options(digits=4)
pkg <- c("PanelMatch", "ggplot2", "plm", 
         "matrixStats",
         "foreign", 
         "stringi", "stringdist",
         "DataCombine","lmtest", "multiwayvcov",
         "data.table",
         "dplyr", 
         "stargazer",
         "clubSandwich", "mediation",
         "doBy")
lapply(pkg, require, character.only = TRUE)

# load datasets
proprietary <- read.csv("proprietary.csv")
load("non_p.RData")
source("functions.R")
# merge 
d_sub1 <- merge(d_sub1, proprietary, by = c("county_id", "time"), all.x = T)

##########
d_sub1$log_final_price_mean <- log(d_sub1$final_price_mean)
d_sub1$log_final_price_sd <- log(d_sub1$final_price_sd + .1)
d_sub1$log_final_price_sum <- log(d_sub1$final_price_sum + 1)

d_sub1$log_final_price_sum[which(is.na(d_sub1$log_final_price_sum))] <- 0

var_names <- c("log_final_price_sum", "log_final_price_mean", "log_final_price_sd",
               "density_mean", "density_sd", 
               "FAR_mean", "FAR_sd")

for (i in 1:length(var_names)) {
  d_sub1 <- missing_indicator(var_names[i],
                              d_sub1)
}

d_sub1$pdd_1500_sum[which(d_sub1$pdd_1500_sum == "NaN")] <- NA
d_sub1$pdd_1500_mean[which(d_sub1$pdd_1500_mean == "NaN")] <- NA
d_sub1$pdd_1000_mean[which(d_sub1$pdd_1000_mean == "NaN")] <- NA
d_sub1$pdd_1000_sum[which(d_sub1$pdd_1000_sum == "NaN")] <- NA
d_sub1$pdd_1000_sum[which(d_sub1$pdd_1000_sum == "NaN")] <- NA
d_sub1$pdd_500_sum[which(d_sub1$pdd_500_sum == "NaN")] <- NA
d_sub1$pdd_500_mean[which(d_sub1$pdd_500_mean == "NaN")] <- NA

d_sub1$pdd_pa_1500_sum[which(d_sub1$pdd_pa_1500_sum == "NaN")] <- NA
d_sub1$pdd_pa_1500_mean[which(d_sub1$pdd_pa_1500_mean == "NaN")] <- NA
d_sub1$pdd_pa_1000_mean[which(d_sub1$pdd_pa_1000_mean == "NaN")] <- NA
d_sub1$pdd_pa_1000_sum[which(d_sub1$pdd_pa_1000_sum == "NaN")] <- NA
d_sub1$pdd_pa_1000_sum[which(d_sub1$pdd_pa_1000_sum == "NaN")] <- NA
d_sub1$pdd_pa_500_sum[which(d_sub1$pdd_pa_500_sum == "NaN")] <- NA
d_sub1$pdd_pa_500_mean[which(d_sub1$pdd_pa_500_mean == "NaN")] <- NA

d_sub1$bpd_1500_sum[which(d_sub1$bpd_1500_sum == "NaN")] <- NA
d_sub1$bpd_1500_mean[which(d_sub1$bpd_1500_mean == "NaN")] <- NA
d_sub1$bpd_1000_mean[which(d_sub1$bpd_1000_mean == "NaN")] <- NA
d_sub1$bpd_1000_sum[which(d_sub1$bpd_1000_sum == "NaN")] <- NA
d_sub1$bpd_1000_sum[which(d_sub1$bpd_1000_sum == "NaN")] <- NA
d_sub1$bpd_500_sum[which(d_sub1$bpd_500_sum == "NaN")] <- NA
d_sub1$bpd_500_mean[which(d_sub1$bpd_500_mean == "NaN")] <- NA

d_sub1$bpd_pa_1500_sum[which(d_sub1$bpd_pa_1500_sum == "NaN")] <- NA
d_sub1$bpd_pa_1500_mean[which(d_sub1$bpd_pa_1500_mean == "NaN")] <- NA
d_sub1$bpd_pa_1000_mean[which(d_sub1$bpd_pa_1000_mean == "NaN")] <- NA
d_sub1$bpd_pa_1000_sum[which(d_sub1$bpd_pa_1000_sum == "NaN")] <- NA
d_sub1$bpd_pa_1000_sum[which(d_sub1$bpd_pa_1000_sum == "NaN")] <- NA
d_sub1$bpd_pa_500_sum[which(d_sub1$bpd_pa_500_sum == "NaN")] <- NA
d_sub1$bpd_pa_500_mean[which(d_sub1$bpd_pa_500_mean == "NaN")] <- NA

d_sub1$fpd_1500_sum[which(d_sub1$fpd_1500_sum == "NaN")] <- NA
d_sub1$fpd_1500_mean[which(d_sub1$fpd_1500_mean == "NaN")] <- NA
d_sub1$fpd_1000_mean[which(d_sub1$fpd_1000_mean == "NaN")] <- NA
d_sub1$fpd_1000_sum[which(d_sub1$fpd_1000_sum == "NaN")] <- NA
d_sub1$fpd_1000_sum[which(d_sub1$fpd_1000_sum == "NaN")] <- NA
d_sub1$fpd_500_sum[which(d_sub1$fpd_500_sum == "NaN")] <- NA
d_sub1$fpd_500_mean[which(d_sub1$fpd_500_mean == "NaN")] <- NA

d_sub1$fpd_pa_1500_sum[which(d_sub1$fpd_pa_1500_sum == "NaN")] <- NA
d_sub1$fpd_pa_1500_mean[which(d_sub1$fpd_pa_1500_mean == "NaN")] <- NA
d_sub1$fpd_pa_1000_mean[which(d_sub1$fpd_pa_1000_mean == "NaN")] <- NA
d_sub1$fpd_pa_1000_sum[which(d_sub1$fpd_pa_1000_sum == "NaN")] <- NA
d_sub1$fpd_pa_1000_sum[which(d_sub1$fpd_pa_1000_sum == "NaN")] <- NA
d_sub1$fpd_pa_500_sum[which(d_sub1$fpd_pa_500_sum == "NaN")] <- NA
d_sub1$fpd_pa_500_mean[which(d_sub1$fpd_pa_500_mean == "NaN")] <- NA

## drawing circles, 0 them ##
d_sub1$pdd_1500_sum[which(is.na(d_sub1$pdd_1500_sum))] <- 0
d_sub1$pdd_1500_mean[which(is.na(d_sub1$pdd_1500_mean))] <- 0
d_sub1$pdd_1000_sum[which(is.na(d_sub1$pdd_1000_sum))] <- 0
d_sub1$pdd_1000_mean[which(is.na(d_sub1$pdd_1000_mean))] <- 0
d_sub1$pdd_500_sum[which(is.na(d_sub1$pdd_500_sum))] <- 0
d_sub1$pdd_500_mean[which(is.na(d_sub1$pdd_500_mean))] <- 0

d_sub1$pdd_pa_1500_sum[which(is.na(d_sub1$pdd_pa_1500_sum))] <- 0
d_sub1$pdd_pa_1500_mean[which(is.na(d_sub1$pdd_pa_1500_mean))] <- 0
d_sub1$pdd_pa_1000_sum[which(is.na(d_sub1$pdd_pa_1000_sum))] <- 0
d_sub1$pdd_pa_1000_mean[which(is.na(d_sub1$pdd_pa_1000_mean))] <- 0
d_sub1$pdd_pa_500_sum[which(is.na(d_sub1$pdd_pa_500_sum))] <- 0
d_sub1$pdd_pa_500_mean[which(is.na(d_sub1$pdd_pa_500_mean))] <- 0

d_sub1$bpd_1500_sum[which(is.na(d_sub1$bpd_1500_sum))] <- 0
d_sub1$bpd_1500_mean[which(is.na(d_sub1$bpd_1500_mean))] <- 0
d_sub1$bpd_1000_sum[which(is.na(d_sub1$bpd_1000_sum))] <- 0
d_sub1$bpd_1000_mean[which(is.na(d_sub1$bpd_1000_mean))] <- 0
d_sub1$bpd_500_sum[which(is.na(d_sub1$bpd_500_sum))] <- 0
d_sub1$bpd_500_mean[which(is.na(d_sub1$bpd_500_mean))] <- 0

d_sub1$bpd_pa_1500_sum[which(is.na(d_sub1$bpd_pa_1500_sum))] <- 0
d_sub1$bpd_pa_1500_mean[which(is.na(d_sub1$bpd_pa_1500_mean))] <- 0
d_sub1$bpd_pa_1000_sum[which(is.na(d_sub1$bpd_pa_1000_sum))] <- 0
d_sub1$bpd_pa_1000_mean[which(is.na(d_sub1$bpd_pa_1000_mean))] <- 0
d_sub1$bpd_pa_500_sum[which(is.na(d_sub1$bpd_pa_500_sum))] <- 0
d_sub1$bpd_pa_500_mean[which(is.na(d_sub1$bpd_pa_500_mean))] <- 0

d_sub1$fpd_1500_sum[which(is.na(d_sub1$fpd_1500_sum))] <- 0
d_sub1$fpd_1500_mean[which(is.na(d_sub1$fpd_1500_mean))] <- 0
d_sub1$fpd_1000_sum[which(is.na(d_sub1$fpd_1000_sum))] <- 0
d_sub1$fpd_1000_mean[which(is.na(d_sub1$fpd_1000_mean))] <- 0
d_sub1$fpd_500_sum[which(is.na(d_sub1$fpd_500_sum))] <- 0
d_sub1$fpd_500_mean[which(is.na(d_sub1$fpd_500_mean))] <- 0

d_sub1$fpd_pa_1500_sum[which(is.na(d_sub1$fpd_pa_1500_sum))] <- 0
d_sub1$fpd_pa_1500_mean[which(is.na(d_sub1$fpd_pa_1500_mean))] <- 0
d_sub1$fpd_pa_1000_sum[which(is.na(d_sub1$fpd_pa_1000_sum))] <- 0
d_sub1$fpd_pa_1000_mean[which(is.na(d_sub1$fpd_pa_1000_mean))] <- 0
d_sub1$fpd_pa_500_sum[which(is.na(d_sub1$fpd_pa_500_sum))] <- 0
d_sub1$fpd_pa_500_mean[which(is.na(d_sub1$fpd_pa_500_mean))] <- 0


#### spatial regression: 0 them ####
d_sub1$abs_resid_log_begi_price_mean[which(is.na(d_sub1$abs_resid_log_begi_price_mean))] <- 0
d_sub1$abs_resid_log_final_price_mean[which(is.na(d_sub1$abs_resid_log_final_price_mean))] <- 0

d_sub1$abs_resid_log_begi_price_pa_mean[which(is.na(d_sub1$abs_resid_log_begi_price_pa_mean))] <- 0
d_sub1$abs_resid_log_final_price_pa_mean[which(is.na(d_sub1$abs_resid_log_final_price_pa_mean))] <- 0

d_sub1$abs_resid_log_price_difference_mean[which(is.na(d_sub1$abs_resid_log_price_difference_mean))] <- 0
d_sub1$abs_resid_log_price_difference_pa_mean[which(is.na(d_sub1$abs_resid_log_price_difference_pa_mean))] <- 0

d_sub1$abs_resid_log_begi_price_mean[which(d_sub1$abs_resid_log_begi_price_pa_mean == "NaN")] <- NA
d_sub1$abs_resid_log_final_price_mean[which(d_sub1$abs_resid_log_final_price_pa_mean == "NaN")] <- NA

d_sub1$abs_resid_log_begi_price_pa_mean[which(d_sub1$abs_resid_log_begi_price_pa_mean == "NaN")] <- NA
d_sub1$abs_resid_log_final_price_pa_mean[which(d_sub1$abs_resid_log_final_price_pa_mean == "NaN")] <- NA

d_sub1$abs_resid_log_price_difference_pa_mean[which(d_sub1$abs_resid_log_price_difference_pa_mean == "NaN")] <- NA
d_sub1$abs_resid_log_price_difference_mean[which(d_sub1$abs_resid_log_price_difference_mean == "NaN")] <- NA

###
d_sub1 <- d_sub1[order(d_sub1$county_id, d_sub1$time),]



# house-cleaning
var_names <- c("fpd_1500_mean", "fpd_1000_mean", "fpd_500_mean",
               "abs_resid_log_final_price_mean")

for (i in 1:length(var_names)) {
  d_sub1[, var_names[i]] <- standardize(d_sub1[, var_names[i]])
}

# running causal mediation analyses manually
var_names <- c("log_arrests_f1_sd", "fpd_500_mean", "abs_resid_log_final_price_mean")

covariates1 <- "log_arrests_f1_sd_missing + log_gdppc_l1 + log_gdp_l1 + log_fia_l1 + log_loan_l1 +
log_hos_l1 + log_industrial_l1 + log_rev_l1 +
log_expd_l1 + log_middle_school_l1 + msec2currentsec_l1 + 
msec2currentgvn_l1 + 
mayor2currentsec_l1 + 
#mayor2currentsec2_l1 + 
mayor2currentgvn_l1 + as.factor(county_id) + as.factor(time)"  

covariates2 <- "log_gdppc_l1 + log_gdp_l1 + log_fia_l1 + log_loan_l1 +
log_hos_l1 + log_industrial_l1 + log_rev_l1 +
log_expd_l1 + log_middle_school_l1 + msec2currentsec_l1 + 
msec2currentgvn_l1 + 
mayor2currentsec_l1 + 
#mayor2currentsec2_l1 + 
mayor2currentgvn_l1 + as.factor(county_id) + as.factor(time)" 

mediation_results <- list()

for (i in 1:length(var_names)) {
  
  if (i == 1){
    formula_med <- as.formula(paste0(var_names[i], "~", "tour_sd + ", covariates1))  
    
    formula_direct <- as.formula(paste0("lead(log_transaction_area,1)", "~", "tour_sd + ",
                                        paste0(var_names[i], "+"), covariates1))
    formula_ate <- as.formula(paste0("lead(log_transaction_area,1)", "~", "tour_sd + ", covariates2))  
  } else {
    formula_med <- as.formula(paste0(var_names[i], "~", "tour_sd + ", covariates2))  
    
    formula_direct <- as.formula(paste0("lead(log_transaction_area,1)", "~", "tour_sd + ",
                                        paste0(var_names[i], "+"), covariates2))
    formula_ate <- as.formula(paste0("lead(log_transaction_area,1)", "~", "tour_sd + ", covariates2))
  }
  
  model_area_con_direct_f1 <- lm(formula_direct,
                                 data = d_sub1)
  vcov_area_con_direct_f1 <- clubSandwich::vcovCR(model_area_con_direct_f1, type="CR1S", cluster = d_sub1$county_id)
  
  model_area_con_med_f1 <- lm(formula_med,
                              data = d_sub1)
  vcov_area_con_med_f1 <- clubSandwich::vcovCR(model_area_con_med_f1, type="CR1S", cluster = d_sub1$county_id)
  
  model_area_con_ate_f1 <- lm(formula_ate,
                              data = d_sub1)
  vcov_area_con_ate_f1 <- clubSandwich::vcovCR(model_area_con_ate_f1, type="CR1S", cluster = d_sub1$county_id)
  
  ## ATE
  ATE_area_p <- coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Estimate"]
  ATE_area_L_95 <- coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Estimate"] -
    qnorm(.975) * coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Std. Error"]
  ATE_area_U_95 <- coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Estimate"] +
    qnorm(.975) * coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Std. Error"]
  ATE_area_L_90 <- coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Estimate"] -
    qnorm(.95) * coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Std. Error"]
  ATE_area_U_90 <- coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Estimate"] +
    qnorm(.95) * coeftest(model_area_con_ate_f1, vcov_area_con_ate_f1)["tour_sd","Std. Error"]
  
  
  ## ADE
  ADE_area_p <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Estimate"]
  ADE_area_L_95 <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Estimate"] -
    qnorm(.975) * coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Std. Error"]
  ADE_area_U_95 <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Estimate"] +
    qnorm(.975) * coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Std. Error"]
  ADE_area_L_90 <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Estimate"] -
    qnorm(.95) * coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Std. Error"]
  ADE_area_U_90 <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Estimate"] +
    qnorm(.95) * coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)["tour_sd","Std. Error"]
  
  
  ## INDEP
  INDEP_area_p <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Estimate"]
  INDEP_area_L_95 <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Estimate"] -
    qnorm(.975) * coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Std. Error"]
  INDEP_area_U_95 <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Estimate"] +
    qnorm(.975) * coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Std. Error"]
  INDEP_area_L_90 <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Estimate"] -
    qnorm(.95) * coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Std. Error"]
  INDEP_area_U_90 <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Estimate"] +
    qnorm(.95) * coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Std. Error"]
  
  
  ## ACME
  beta <- coeftest(model_area_con_med_f1, vcov_area_con_med_f1)["tour_sd","Estimate"]
  gamma <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Estimate"]
  var_beta <- coeftest(model_area_con_med_f1, vcov_area_con_med_f1)["tour_sd","Std. Error"]^2
  var_gamma <- coeftest(model_area_con_direct_f1, vcov_area_con_direct_f1)[var_names[i],"Std. Error"]^2
  
  ACME_area_se <- sqrt(beta^2 * var_gamma + gamma^2 * var_beta)
  ACME_area_p <- beta * gamma
  ACME_area_L_95 <- ACME_area_p - qnorm(.975) * ACME_area_se
  ACME_area_U_95 <- ACME_area_p + qnorm(.975) * ACME_area_se
  ACME_area_L_90 <- ACME_area_p - qnorm(.95) * ACME_area_se
  ACME_area_U_90 <- ACME_area_p + qnorm(.95) * ACME_area_se
  
  mediation_results[[i]] <- list(c(ATE_area_L_90, ATE_area_p, ATE_area_U_90),
                                 c(ATE_area_L_95, ATE_area_p, ATE_area_U_95),
                                 
                                 c(ADE_area_L_90, ADE_area_p, ADE_area_U_90),
                                 c(ADE_area_L_95, ADE_area_p, ADE_area_U_95),
                                 
                                 c(ACME_area_L_90, ACME_area_p, ACME_area_U_90),
                                 c(ACME_area_L_95, ACME_area_p, ACME_area_U_95))
  
}

## Making figure
xlabs <- c("Effect of Inspection on Land Area (t+1)",
           "Effect of Inspection on Land Area (t+1)",
           "Effect of Inspection on Land Area (t+1)")
mains <- c("Number of Arrests as the Mediator", 
           "Corruption potential as the mediator, \n measured by price residuals \n within 500m radius",
           "Corruption potential as the mediator, \n measured by price residuals \n from Spatial LASSO")

pdf(file = "output/Figure_6.pdf",
    width = 10, height = 4)
par(mfrow = c(1,3))
for (i in 1:length(mediation_results)) {
  plot(c(mediation_results[[i]][[1]][2],  mediation_results[[i]][[3]][2],
         mediation_results[[i]][[5]][2]), 1:3, ylab = "",
       main = mains[i],
       xlab = xlabs[i],
       yaxt = "n",
       xlim = c(-.08,.08),
       pch = 16, cex = 1)
  
  abline(v = 0, lty = 3)
  lines(c(mediation_results[[i]][[2]][1], mediation_results[[i]][[2]][3]), c(1,1) )
  lines(c(mediation_results[[i]][[4]][1], mediation_results[[i]][[4]][3]), c(2,2) )
  lines(c(mediation_results[[i]][[6]][1], mediation_results[[i]][[6]][3]), c(3,3) )
  
  lines(c(mediation_results[[i]][[1]][1], mediation_results[[i]][[1]][3]), c(1,1), lwd = 5,
        col = "grey")
  lines(c(mediation_results[[i]][[3]][1], mediation_results[[i]][[3]][3]), c(2,2), lwd = 5,
        col = "grey")
  lines(c(mediation_results[[i]][[5]][1], mediation_results[[i]][[5]][3]), c(3,3), lwd = 5,
        col = "grey")
  
  if (i == 1) {
    text(mediation_results[[i]][[3]][1]+.006, 2.97, "Mediation Effect \n of Arrests", col = "blue")
    text(mediation_results[[i]][[1]][2], 2.15, "Direct Effect \n of Inspection", col = "blue")
    text(mediation_results[[i]][[2]][2], 1.15, "Total Effect \n of Inspection", col = "blue")  
  } else if (i == 2) {
    text(mediation_results[[i]][[3]][1], 2.97, "Mediation Effect \n of Corruption Potential", col = "blue")
    text(mediation_results[[i]][[1]][2], 2.15, "Direct Effect \n of Inspection", col = "blue")
    text(mediation_results[[i]][[2]][2], 1.15, "Total Effect \n of Inspection", col = "blue")  
  } else if (i == 3) {
    text(mediation_results[[i]][[3]][1], 2.97, "Mediation Effect \n of Corruption Potential", col = "blue")
    text(mediation_results[[i]][[1]][2], 2.15, "Direct Effect \n of Inspection", col = "blue")
    text(mediation_results[[i]][[2]][2], 1.15, "Total Effect \n of Inspection", col = "blue")  
  }
  
  
  points(c(mediation_results[[i]][[1]][2],  mediation_results[[i]][[3]][2]
  ), 1:2, 
  pch = 19)  
  points(mediation_results[[i]][[5]][2], 3, pch = 20, cex = 0.6)
  
}
dev.off()

